from urllib import request
import  urllib.parse
import  urllib.error
from bs4 import BeautifulSoup
import csv
import io
import sys
import time

sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
# resp = request.urlopen('https://book.douban.com/tag/%E4%BA%92%E8%81%94%E7%BD%91?type=S')
# html_data = resp.read().decode('utf-8')
# # print(html_data)
# soup = BeautifulSoup(html_data,'html.parser')
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1;WOW64) AppleWebkit/537.36 (KHTML,like Geckop) Chrome/55.0.2883.87 Safari/537.36'
}


def get_url_book(url):
    print(url)
    resp = urllib.request.urlopen(url)
    web_data = resp.read().decode('utf-8')
    # print(web_data)
    soup = BeautifulSoup(web_data,'html.parser')
    time.sleep(2)
    return read(soup)

def catch(data):
    capture = ''
    try:
        capture = data
    except:
        capture = ''
        print('error')
    else:
        return capture
    finally:
        pass

def read(soup):
    nowplaying_book = soup.find_all('ul',class_='subject-list')
    # print(nowplaying_book)
    nowplaying_book_list = nowplaying_book[0].find_all('li',class_='subject-item')
    # print(nowplaying_book_list[0])
    book_list = []
    for item in nowplaying_book_list:
        nowplaying_dict = {}
        for tag_info_item in item.find_all('div',class_='info'):

            nowplaying_dict['name'] = tag_info_item.find_all('a')[0]['title']
            try:
                nowplaying_dict['dec'] = tag_info_item.find_all('p')[0].string
            except:
                nowplaying_dict['dec'] = ''

            nowplaying_dict['pub'] = tag_info_item.find_all('div',class_='pub')[0].string.replace('\n','')
            try:
                nowplaying_dict['rating_nums'] = tag_info_item.find_all('span',class_='rating_nums')[0].string
            except:
                nowplaying_dict['rating_nums'] = '(少于10人评价)'
            nowplaying_dict['people'] = tag_info_item.find_all('span',class_='pl')[0].string.replace('\n','')
            # print(nowplaying_dict)
            book_list.append(nowplaying_dict)
    # print(book_list)
    return book_list
    # print(book_list)

list = []
for a in range(49):
    url = 'https://book.douban.com/tag/%E4%BA%92%E8%81%94%E7%BD%91?start={}&type=T'.format(a*20)
    data = get_url_book(url)
    list+=data
    print(url)
print(list)
with open("book.csv","w",encoding='gb18030',newline='') as datacsv:
    csvwriter = csv.writer(datacsv,dialect=("excel"))
    csvwriter.writerow(["名称","描述","作者/出版社/价格","评分","评分人数"])
    for item in list:
        csvwriter.writerow([item['name'],item['dec'],item['pub'],item['rating_nums'],item['people']])

print("ok")